
clear all
set more off

global dir /Volumes/Zihao_SSD2/PatentsView

*** ===============================================================================================================
*** Table B22. Exclude self citations
*** Zihao Li. 11/2024
*** ===============================================================================================================

import delimited $dir/temp/omission_panel5_noselfcite.csv, clear
keep patent_id cited_patent_id omission sim_score
rename (patent_id cited_patent_id) (patent_id_i patent_id_j)

*** Merge with citing patents (i) and cited patents (j)
merge m:1 patent_id_i using $dir/temp/patent_i.dta
keep if _merge == 3
drop _merge
merge m:1 patent_id_j using $dir/temp/patent_j.dta
keep if _merge == 3
drop _merge
sort patent_id_i patent_id_j

*** Generate regression variables
* same main_cpc_subclass
gen same_main_cpc = 1 if main_cpc_subclass_i == main_cpc_subclass_j
replace same_main_cpc = 0 if same_main_cpc != 1

* same assignee_country
gen same_assignee_country = 1 if assignee_country_i == assignee_country_j & assignee_country_i != "" & assignee_country_j != ""
replace same_assignee_country = 0 if same_assignee_country != 1

* same assignee_location
gen same_assignee_location = 1 if assignee_location_id_i == assignee_location_id_j & assignee_location_id_i != "" & assignee_location_id_j != ""
replace same_assignee_location = 0 if same_assignee_location != 1

* same assignee
gen same_assignee = 1 if assignee_id_i == assignee_id_j
replace same_assignee = 0 if same_assignee != 1

* years lag between citing patent and cited patent
gen years_lag = patent_year_i - patent_year_j

* KPSS commercial value (quality) variable (convert to dollar)
gen xi_dollar_real_i = xi_real_i * 1000000
gen xi_dollar_real_j = xi_real_j* 1000000
gen dollar_real_log_i = log(xi_dollar_real_i)
gen dollar_real_log_j = log(xi_dollar_real_j)

drop gender_09_100_list* gender_io_09_100_list* gender_09_50_list* gender_08_100_list* gender_08_50_list* gender_io_05_100_list* gender_io_06_100_list* gender_io_07_100_list* gender_io_08_100_list* race80_list*
drop if sim_score == 1

* Generate gender interaction variables
gen allfemale_09_100_ji = allfemale_09_100_i * allfemale_09_100_j
gen leadfemale_09_100_ji = leadfemale_09_100_i * leadfemale_09_100_j
gen allfemale_leadfemale_09_100_ji = allfemale_09_100_j * leadfemale_09_100_i
gen leadfemale_allfemale_09_100_ji = leadfemale_09_100_j * allfemale_09_100_i

encode assignee_id_i, gen(assignee_id_i_enc)
encode assignee_id_j, gen(assignee_id_j_enc)
encode main_cpc_section_i, gen(main_cpc_section_i_enc)
encode main_cpc_section_j, gen(main_cpc_section_j_enc)
drop if patent_year_i==. | main_cpc_section_i=="" | main_cpc_section_j==""


*** Run Regression
* Col (1)
reghdfe omission allfemale_09_100_j sim_score dollar_real_log_j num_citations_i num_inventors_i num_inventors_j avg_experience_i avg_experience_j same_main_cpc same_assignee_country years_lag, absorb(assignee_id_i_enc assignee_id_j_enc patent_year_i main_cpc_section_i_enc main_cpc_section_j_enc) vce(cluster patent_id_i)
estadd ysumm
outreg2 using $dir/reg_results/tableb22.doc, replace dec(4) keep(allfemale_09_100_j sim_score dollar_real_log_j num_citations_i num_inventors_i num_inventors_j avg_experience_i avg_experience_j same_main_cpc same_assignee_country years_lag) addtext(Firm i FE, Yes, Firm j FE, Yes, Year i FE, Yes, Main CPC Section i FE, Yes, Main CPC Section j FE, Yes, Cluster SE, Citing, Sample, All) addstat(Mean of dependent variable, e(ymean))

* Col (2)
reghdfe omission allfemale_09_100_i sim_score dollar_real_log_j num_citations_i num_inventors_i num_inventors_j avg_experience_i avg_experience_j same_main_cpc same_assignee_country years_lag, absorb(assignee_id_i_enc assignee_id_j_enc patent_year_i main_cpc_section_i_enc main_cpc_section_j_enc) vce(cluster patent_id_i)
estadd ysumm
outreg2 using $dir/reg_results/tableb22.doc, append dec(4) keep(allfemale_09_100_i sim_score dollar_real_log_j num_citations_i num_inventors_i num_inventors_j avg_experience_i avg_experience_j same_main_cpc same_assignee_country years_lag) addtext(Firm i FE, Yes, Firm j FE, Yes, Year i FE, Yes, Main CPC Section i FE, Yes, Main CPC Section j FE, Yes, Cluster SE, Citing, Sample, All) addstat(Mean of dependent variable, e(ymean))

* Col (3)
reghdfe omission allfemale_09_100_j allfemale_09_100_i allfemale_09_100_ji sim_score dollar_real_log_j num_citations_i num_inventors_i num_inventors_j avg_experience_i avg_experience_j same_main_cpc same_assignee_country years_lag, absorb(assignee_id_i_enc assignee_id_j_enc patent_year_i main_cpc_section_i_enc main_cpc_section_j_enc) vce(cluster patent_id_i)
estadd ysumm
outreg2 using $dir/reg_results/tableb22.doc, append dec(4) keep(allfemale_09_100_j sim_score allfemale_09_100_i allfemale_09_100_ji dollar_real_log_j num_citations_i num_inventors_i num_inventors_j avg_experience_i avg_experience_j same_main_cpc same_assignee_country years_lag) addtext(Firm i FE, Yes, Firm j FE, Yes, Year i FE, Yes, Main CPC Section i FE, Yes, Main CPC Section j FE, Yes, Cluster SE, Citing, Sample, All) addstat(Mean of dependent variable, e(ymean))

* Col (4)
reghdfe omission allfemale_09_100_j allfemale_09_100_i allfemale_09_100_ji sim_score dollar_real_log_j num_citations_i num_inventors_i num_inventors_j avg_experience_i avg_experience_j same_main_cpc same_assignee_country years_lag if ((allfemale_09_100_j==1)|(allmale_09_100_j==1)) & ((allfemale_09_100_i==1)|(allmale_09_100_i==1)), absorb(assignee_id_i_enc assignee_id_j_enc patent_year_i main_cpc_section_i_enc main_cpc_section_j_enc) vce(cluster patent_id_i)
estadd ysumm
outreg2 using $dir/reg_results/tableb22.doc, append dec(4) keep(allfemale_09_100_j allfemale_09_100_i allfemale_09_100_ji sim_score dollar_real_log_j num_citations_i num_inventors_i num_inventors_j avg_experience_i avg_experience_j same_main_cpc same_assignee_country years_lag) addtext(Firm i FE, Yes, Firm j FE, Yes, Year i FE, Yes, Main CPC Section i FE, Yes, Main CPC Section j FE, Yes, Cluster SE, Citing, Sample, Homo Gender) addstat(Mean of dependent variable, e(ymean))

* Col (5)
reghdfe omission leadfemale_09_100_j sim_score dollar_real_log_j num_citations_i num_inventors_i num_inventors_j avg_experience_i avg_experience_j same_main_cpc same_assignee_country years_lag, absorb(assignee_id_i_enc assignee_id_j_enc patent_year_i main_cpc_section_i_enc main_cpc_section_j_enc) vce(cluster patent_id_i)
estadd ysumm
outreg2 using $dir/reg_results/tableb22.doc, append dec(4) keep(leadfemale_09_100_j sim_score dollar_real_log_j num_citations_i num_inventors_i num_inventors_j avg_experience_i avg_experience_j same_main_cpc same_assignee_country years_lag) addtext(Firm i FE, Yes, Firm j FE, Yes, Year i FE, Yes, Main CPC Section i FE, Yes, Main CPC Section j FE, Yes, Cluster SE, Citing, Sample, All) addstat(Mean of dependent variable, e(ymean))

* Col (6)
reghdfe omission leadfemale_09_100_j leadfemale_09_100_i leadfemale_09_100_ji sim_score dollar_real_log_j num_citations_i num_inventors_i num_inventors_j avg_experience_i avg_experience_j same_main_cpc same_assignee_country years_lag, absorb(assignee_id_i_enc assignee_id_j_enc patent_year_i main_cpc_section_i_enc main_cpc_section_j_enc) vce(cluster patent_id_i)
estadd ysumm
outreg2 using $dir/reg_results/tableb22.doc, append dec(4) keep(leadfemale_09_100_j leadfemale_09_100_i leadfemale_09_100_ji sim_score dollar_real_log_j num_citations_i num_inventors_i num_inventors_j avg_experience_i avg_experience_j same_main_cpc same_assignee_country years_lag) addtext(Firm i FE, Yes, Firm j FE, Yes, Year i FE, Yes, Main CPC Section i FE, Yes, Main CPC Section j FE, Yes, Cluster SE, Citing, Sample, All) addstat(Mean of dependent variable, e(ymean))

* Col (7)
reghdfe omission allfemale_09_100_j leadfemale_09_100_i allfemale_leadfemale_09_100_ji sim_score dollar_real_log_j num_citations_i num_inventors_i num_inventors_j avg_experience_i avg_experience_j same_main_cpc same_assignee_country years_lag, absorb(assignee_id_i_enc assignee_id_j_enc patent_year_i main_cpc_section_i_enc main_cpc_section_j_enc) vce(cluster patent_id_i)
estadd ysumm
outreg2 using $dir/reg_results/tableb22.doc, append dec(4) keep(allfemale_09_100_j leadfemale_09_100_i allfemale_leadfemale_09_100_ji sim_score dollar_real_log_j num_citations_i num_inventors_i num_inventors_j avg_experience_i avg_experience_j same_main_cpc same_assignee_country years_lag) addtext(Firm i FE, Yes, Firm j FE, Yes, Year i FE, Yes, Main CPC Section i FE, Yes, Main CPC Section j FE, Yes, Cluster SE, Citing, Sample, All) addstat(Mean of dependent variable, e(ymean))

* Col (8)
reghdfe omission allfemale_09_100_i leadfemale_09_100_j leadfemale_allfemale_09_100_ji sim_score dollar_real_log_j num_citations_i num_inventors_i num_inventors_j avg_experience_i avg_experience_j same_main_cpc same_assignee_country years_lag, absorb(assignee_id_i_enc assignee_id_j_enc patent_year_i main_cpc_section_i_enc main_cpc_section_j_enc) vce(cluster patent_id_i) 
estadd ysumm
outreg2 using $dir/reg_results/tableb22.doc, append dec(4) keep(allfemale_09_100_i leadfemale_09_100_j leadfemale_allfemale_09_100_ji sim_score dollar_real_log_j num_citations_i num_inventors_i num_inventors_j avg_experience_i avg_experience_j same_main_cpc same_assignee_country years_lag) addtext(Firm i FE, Yes, Firm j FE, Yes, Year i FE, Yes, Main CPC Section i FE, Yes, Main CPC Section j FE, Yes, Cluster SE, Citing, Sample, All) addstat(Mean of dependent variable, e(ymean))

